#! /usr/bin/env python

"""Generate some reports that are stored in the ``_reports/`` directory.

Those are used for running tests or visually checking the output.
"""

if __name__ == "__main__":
    import datetime
    import pathlib

    import numpy as np
    import pandas as pd

    from skrub import TableReport, datasets

    reports_dir = pathlib.Path(__file__).resolve().parent / "_reports"
    reports_dir.mkdir(exist_ok=True)

    #
    # "Vanilla" employee salaries dataset
    # -----------------------------------
    #

    df = datasets.fetch_employee_salaries().X
    html = TableReport(df).html()
    (reports_dir / "employee_salaries.html").write_text(html, encoding="UTF-8")

    #
    # Small df with col names & values that facilitate testing keyboard navigation
    # ----------------------------------------------------------------------------
    #

    df = pd.DataFrame(
        {
            "c 0": list(range(7)),
            "c 1": [f"v 0{i}" for i in range(7)],
            "c 2": [i + 10 for i in range(7)],
            "c 3": [f"v {i + 10}" for i in range(7)],
        }
    )
    html = TableReport(df).html()
    (reports_dir / "mini.html").write_text(html, encoding="UTF-8")

    #
    # Example with column values in different writing systems
    # -------------------------------------------------------
    #
    # This is only for checking visually that they display correctly, in
    # particular the (elided) strings in the matplotlib axis labels.
    # See https://github.com/skrub-data/skrub/pull/1097
    #

    df = pd.DataFrame(
        {
            "Computer programming": [
                "程序设计",
                (
                    "在计算机技术发展的早期，軟體開發主要就是程序设计。但随着技术的发展，"
                    "软件系统越来越复杂，逐渐分化出许多专用的软件系统"
                ),
                "برمجة",
                (
                    "البرمجة هي عملية كتابة تعليمات وتوجيه أوامر لجهاز"
                    " الحاسوب أو أي جهاز آخر مثل قارئات"
                ),
                "🖥️💻🖱️🖨️📄📝🔢📊📌🤖",
                (
                    "🖥️💻🖱️🖨️📄👨‍💻👩‍💻📝🔢📊📌🤖🌐🔧🛠️👨‍🔬👩‍🔬💡📚⚙️🛡️🕹️"
                    "🧠👩‍💼🖥️💻🖱️🖨️📄👨‍💻👩‍💻📝🔢📊📌🤖🌐🔧🛠️👨‍🔬👩‍🔬💡📚⚙️🛡️🕹️🧠👩‍💼"
                ),
                "Computer programming",
                (
                    "Computer programming or coding is the composition of sequences"
                    " of instructions, called programs, that computers can follow to"
                ),
            ]
        }
    )
    html = TableReport(df).html()
    (reports_dir / "unicode.html").write_text(html, encoding="UTF-8")

    #
    # Pandas MultiIndex
    # -----------------
    #
    # Example from https://pandas.pydata.org/docs/user_guide/reshaping.html#pivot-table
    #

    df = pd.DataFrame(
        {
            "A": ["one", "one", "two", "three"] * 6,
            "B": ["A", "B", "C"] * 8,
            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
            "D": np.random.randn(24),
            "E": np.random.randn(24),
            "F": [datetime.datetime(2013, i, 1) for i in range(1, 13)]
            + [datetime.datetime(2013, i, 15) for i in range(1, 13)],
        }
    )

    df = pd.pivot_table(
        df,
        values="E",
        index=["B", "C"],
        columns=["A"],
        aggfunc=["sum", "mean"],
    )

    html = TableReport(df).html()
    (reports_dir / "multi_index.html").write_text(html, encoding="UTF-8")

    #
    # Reports with different open_tab settings for testing
    # -----------------------------------------------------
    #

    df = datasets.fetch_employee_salaries().X

    # Report opening on stats tab
    html = TableReport(df, open_tab="stats").html()
    (reports_dir / "open_tab_stats.html").write_text(html, encoding="UTF-8")

    # Report opening on distributions tab
    html = TableReport(df, open_tab="distributions").html()
    (reports_dir / "open_tab_distributions.html").write_text(html, encoding="UTF-8")

    # Report opening on associations tab
    html = TableReport(df, open_tab="associations").html()
    (reports_dir / "open_tab_associations.html").write_text(html, encoding="UTF-8")
